Add VMX memory-mapped Local APIC access optimization.
authorkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 30 May 2007 15:48:28 +0000 (16:48 +0100)
committerkfraser@localhost.localdomain <kfraser@localhost.localdomain>
Wed, 30 May 2007 15:48:28 +0000 (16:48 +0100)
Some operating systems access the local APIC TPR very frequently, and
we handle that using software-based local APIC virtualization in Xen
today. Such virtualization incurs a number of VM exits from the
memory-access instructions against the APIC page in the guest.

The attached patch enables the TPR shadow feature that provides APIC
TPR virtualization in hardware. Our tests indicate it can
significantly boost the performance of such guests including 32-bit
Windows XP/2003.

Moreover, with the patch, local APIC accesses other than TPR in guests
are intercepted directly as APIC_ACCESS VM exits rather than
PAGE_FAULT VM exits; this can lower the emulation cost of such
accesses.

Signed-off-by: Dexuan Cui <dexuan.cui@intel.com>
xen/arch/x86/hvm/hvm.c
xen/arch/x86/hvm/vlapic.c
xen/arch/x86/hvm/vmx/intr.c
xen/arch/x86/hvm/vmx/vmcs.c
xen/arch/x86/hvm/vmx/vmx.c
xen/include/asm-x86/hvm/domain.h
xen/include/asm-x86/hvm/vlapic.h
xen/include/asm-x86/hvm/vmx/vmcs.h
xen/include/asm-x86/hvm/vmx/vmx.h
xen/include/asm-x86/msr.h

index 9a2135254371e1dceda9b107000c8b228f73bc11..c3e5ebe3e668179daa3159bd1d9139fbd8352967 100644 (file)
@@ -226,6 +226,7 @@ int hvm_domain_initialise(struct domain *d)
 
     spin_lock_init(&d->arch.hvm_domain.pbuf_lock);
     spin_lock_init(&d->arch.hvm_domain.irq_lock);
+    spin_lock_init(&d->arch.hvm_domain.vapic_access_lock);
 
     rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
     if ( rc != 0 )
index e9efdda2b4d59707c7f3e5d90751f6f5e0a61a76..19ab01499ad46adb646b3b8419a60a58fa6c36c9 100644 (file)
@@ -79,8 +79,6 @@ static unsigned int vlapic_lvt_mask[VLAPIC_LVT_NUM] =
 #define vlapic_lvtt_period(vlapic)                              \
     (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC)
 
-#define vlapic_base_address(vlapic)                             \
-    (vlapic->hw.apic_base_msr & MSR_IA32_APICBASE_BASE)
 
 /*
  * Generic APIC bitmap vector update & search routines.
index 9587ee14513ce4b1f7a6c22c9793399f1e735bd5..9119cfbf221cc84d5eb9f5320dfa97e000f7214b 100644 (file)
@@ -67,7 +67,6 @@ static inline int is_interruptibility_state(void)
     return __vmread(GUEST_INTERRUPTIBILITY_INFO);
 }
 
-#ifdef __x86_64__
 static void update_tpr_threshold(struct vlapic *vlapic)
 {
     int max_irr, tpr;
@@ -75,6 +74,11 @@ static void update_tpr_threshold(struct vlapic *vlapic)
     if ( !cpu_has_vmx_tpr_shadow )
         return;
 
+#ifdef __i386__
+    if ( !vlapic->mmap_vtpr_enabled )
+        return;
+#endif
+
     if ( !vlapic_enabled(vlapic) || 
          ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
     {
@@ -85,9 +89,6 @@ static void update_tpr_threshold(struct vlapic *vlapic)
     tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0;
     __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4));
 }
-#else
-#define update_tpr_threshold(v) ((void)0)
-#endif
 
 asmlinkage void vmx_intr_assist(void)
 {
index 2f4c906e30e1262cee59866aa891aa8439145408..ee5f437797efd7ec22641eb8105205ba6fc1a00d 100644 (file)
@@ -40,6 +40,7 @@
 /* Dynamic (run-time adjusted) execution control flags. */
 u32 vmx_pin_based_exec_control __read_mostly;
 u32 vmx_cpu_based_exec_control __read_mostly;
+u32 vmx_secondary_exec_control __read_mostly;
 u32 vmx_vmexit_control __read_mostly;
 u32 vmx_vmentry_control __read_mostly;
 
@@ -60,11 +61,15 @@ static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
     return ctl;
 }
 
+#define vmx_has_secondary_exec_ctls \
+    (_vmx_cpu_based_exec_control & ACTIVATE_SECONDARY_CONTROLS)
+
 void vmx_init_vmcs_config(void)
 {
     u32 vmx_msr_low, vmx_msr_high, min, opt;
     u32 _vmx_pin_based_exec_control;
     u32 _vmx_cpu_based_exec_control;
+    u32 _vmx_secondary_exec_control = 0;
     u32 _vmx_vmexit_control;
     u32 _vmx_vmentry_control;
 
@@ -80,9 +85,8 @@ void vmx_init_vmcs_config(void)
            CPU_BASED_ACTIVATE_IO_BITMAP |
            CPU_BASED_USE_TSC_OFFSETING);
     opt = CPU_BASED_ACTIVATE_MSR_BITMAP;
-#ifdef __x86_64__
     opt |= CPU_BASED_TPR_SHADOW;
-#endif
+    opt |= ACTIVATE_SECONDARY_CONTROLS;
     _vmx_cpu_based_exec_control = adjust_vmx_controls(
         min, opt, MSR_IA32_VMX_PROCBASED_CTLS_MSR);
 #ifdef __x86_64__
@@ -92,8 +96,19 @@ void vmx_init_vmcs_config(void)
         _vmx_cpu_based_exec_control = adjust_vmx_controls(
             min, opt, MSR_IA32_VMX_PROCBASED_CTLS_MSR);
     }
+#elif defined(__i386__)
+    if ( !vmx_has_secondary_exec_ctls )
+        _vmx_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
 #endif
 
+    if ( vmx_has_secondary_exec_ctls )
+    {
+        min = 0;
+        opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+        _vmx_secondary_exec_control = adjust_vmx_controls(
+            min, opt, MSR_IA32_VMX_PROCBASED_CTLS2);
+    }
+
     min = VM_EXIT_ACK_INTR_ON_EXIT;
     opt = 0;
 #ifdef __x86_64__
@@ -113,6 +128,8 @@ void vmx_init_vmcs_config(void)
         vmcs_revision_id = vmx_msr_low;
         vmx_pin_based_exec_control = _vmx_pin_based_exec_control;
         vmx_cpu_based_exec_control = _vmx_cpu_based_exec_control;
+        if ( vmx_has_secondary_exec_ctls )
+            vmx_secondary_exec_control = _vmx_secondary_exec_control;
         vmx_vmexit_control         = _vmx_vmexit_control;
         vmx_vmentry_control        = _vmx_vmentry_control;
     }
@@ -121,6 +138,8 @@ void vmx_init_vmcs_config(void)
         BUG_ON(vmcs_revision_id != vmx_msr_low);
         BUG_ON(vmx_pin_based_exec_control != _vmx_pin_based_exec_control);
         BUG_ON(vmx_cpu_based_exec_control != _vmx_cpu_based_exec_control);
+        if ( vmx_has_secondary_exec_ctls )
+            BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
         BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
         BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
     }
@@ -291,6 +310,8 @@ static void construct_vmcs(struct vcpu *v)
     __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
     __vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmx_cpu_based_exec_control);
     v->arch.hvm_vcpu.u.vmx.exec_control = vmx_cpu_based_exec_control;
+    if ( vmx_cpu_based_exec_control & ACTIVATE_SECONDARY_CONTROLS )
+        __vmwrite(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control);
 
     if ( cpu_has_vmx_msr_bitmap )
         __vmwrite(MSR_BITMAP, virt_to_maddr(vmx_msr_bitmap));
@@ -417,7 +438,7 @@ static void construct_vmcs(struct vcpu *v)
     __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
 
 #ifdef __x86_64__ 
-    /* VLAPIC TPR optimisation. */
+    /* CR8 based VLAPIC TPR optimization. */
     if ( cpu_has_vmx_tpr_shadow )
     {
         __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
@@ -426,6 +447,16 @@ static void construct_vmcs(struct vcpu *v)
     }
 #endif
 
+    /* Memory-mapped based VLAPIC TPR optimization. */
+    if ( cpu_has_vmx_mmap_vtpr_optimization )
+    {
+        __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
+                    page_to_maddr(vcpu_vlapic(v)->regs_page));
+        __vmwrite(TPR_THRESHOLD, 0);
+
+        vcpu_vlapic(v)->mmap_vtpr_enabled = 1;
+    }
+
     __vmwrite(GUEST_LDTR_SELECTOR, 0);
     __vmwrite(GUEST_LDTR_BASE, 0);
     __vmwrite(GUEST_LDTR_LIMIT, 0);
@@ -496,6 +527,18 @@ void vmx_do_resume(struct vcpu *v)
         vmx_set_host_env(v);
     }
 
+    if ( !v->arch.hvm_vmx.launched && vcpu_vlapic(v)->mmap_vtpr_enabled )
+    {
+        struct page_info *pg = change_guest_physmap_for_vtpr(v->domain, 1);
+
+        if ( pg == NULL )
+        {
+            gdprintk(XENLOG_ERR, "change_guest_physmap_for_vtpr failed!\n");
+            domain_crash_synchronous();
+        }
+        __vmwrite(APIC_ACCESS_ADDR, page_to_maddr(pg));
+    }
+
     debug_state = v->domain->debugger_attached;
     if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
     {
index abe06dcf4344b3ce6d049564e3f4479406daa3b4..c77092204f19661fde5ca1686424bd9cb6da165f 100644 (file)
@@ -2483,6 +2483,114 @@ done:
     return 1;
 }
 
+struct page_info * change_guest_physmap_for_vtpr(struct domain *d,
+                                                 int enable_vtpr)
+{
+    struct page_info *pg;
+    unsigned long pfn, mfn;
+
+    spin_lock(&d->arch.hvm_domain.vapic_access_lock);
+
+    pg = d->arch.hvm_domain.apic_access_page;
+    pfn = paddr_to_pfn(APIC_DEFAULT_PHYS_BASE);
+
+    if ( enable_vtpr )
+    {
+        if ( d->arch.hvm_domain.physmap_changed_for_vlapic_access )
+            goto out;
+
+        if ( pg == NULL )
+            pg = alloc_domheap_page(d);
+        if ( pg == NULL )
+        {
+            gdprintk(XENLOG_ERR, "alloc_domheap_pages() failed!\n");
+            goto out;
+        }
+
+        mfn = page_to_mfn(pg);
+        d->arch.hvm_domain.apic_access_page = pg;
+
+        guest_physmap_add_page(d, pfn, mfn);
+
+        d->arch.hvm_domain.physmap_changed_for_vlapic_access = 1;
+
+        goto out;
+    }
+    else
+    {
+        if ( d->arch.hvm_domain.physmap_changed_for_vlapic_access )
+        {
+            mfn = page_to_mfn(pg);
+            guest_physmap_remove_page(d, pfn, mfn);
+            flush_tlb_mask(d->domain_dirty_cpumask);
+
+            d->arch.hvm_domain.physmap_changed_for_vlapic_access = 0;
+        }
+        pg = NULL;
+        goto out;
+    }
+
+out:
+    spin_unlock(&d->arch.hvm_domain.vapic_access_lock);
+    return pg;
+}
+
+static void check_vlapic_msr_for_vtpr(struct vcpu *v)
+{
+    struct vlapic *vlapic = vcpu_vlapic(v);
+    int    mmap_vtpr_enabled = vcpu_vlapic(v)->mmap_vtpr_enabled;
+    uint32_t tmp;
+
+
+    if ( vlapic_hw_disabled(vlapic) && mmap_vtpr_enabled )
+    {
+        vcpu_vlapic(v)->mmap_vtpr_enabled = 0;    
+
+#ifdef __i386__
+        v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                  v->arch.hvm_vcpu.u.vmx.exec_control);
+#elif defined(__x86_64__)
+        if ( !cpu_has_vmx_tpr_shadow )
+        {
+            v->arch.hvm_vcpu.u.vmx.exec_control &= ~CPU_BASED_TPR_SHADOW;
+            __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                v->arch.hvm_vcpu.u.vmx.exec_control);
+        }
+#endif
+        tmp  = __vmread(SECONDARY_VM_EXEC_CONTROL);
+        tmp &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+        __vmwrite(SECONDARY_VM_EXEC_CONTROL, tmp);
+
+        change_guest_physmap_for_vtpr(v->domain, 0);
+    }
+    else if ( !vlapic_hw_disabled(vlapic) && !mmap_vtpr_enabled &&
+              cpu_has_vmx_mmap_vtpr_optimization )
+    {
+        vcpu_vlapic(v)->mmap_vtpr_enabled = 1;
+
+        v->arch.hvm_vcpu.u.vmx.exec_control |=
+            ( ACTIVATE_SECONDARY_CONTROLS | CPU_BASED_TPR_SHADOW );
+        __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+                  v->arch.hvm_vcpu.u.vmx.exec_control);
+        tmp  = __vmread(SECONDARY_VM_EXEC_CONTROL);
+        tmp |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+        __vmwrite(SECONDARY_VM_EXEC_CONTROL, tmp);
+
+        change_guest_physmap_for_vtpr(v->domain, 1);
+    }
+
+    if ( vcpu_vlapic(v)->mmap_vtpr_enabled &&
+        !vlapic_hw_disabled(vlapic) &&
+        (vlapic_base_address(vlapic) != APIC_DEFAULT_PHYS_BASE) )
+    {
+        gdprintk(XENLOG_ERR,
+                 "Local APIC base address is set to 0x%016"PRIx64"!\n",
+                  vlapic_base_address(vlapic));
+        domain_crash_synchronous();
+    }
+}
+
 static inline int vmx_do_msr_write(struct cpu_user_regs *regs)
 {
     u32 ecx = regs->ecx;
@@ -2511,6 +2619,7 @@ static inline int vmx_do_msr_write(struct cpu_user_regs *regs)
         break;
     case MSR_IA32_APICBASE:
         vlapic_msr_set(vcpu_vlapic(v), msr_content);
+        check_vlapic_msr_for_vtpr(v);
         break;
     default:
         if ( !long_mode_do_msr_write(regs) )
@@ -2823,6 +2932,15 @@ asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
 
     case EXIT_REASON_TPR_BELOW_THRESHOLD:
         break;
+    case EXIT_REASON_APIC_ACCESS:
+    {
+        unsigned long offset;
+
+        exit_qualification = __vmread(EXIT_QUALIFICATION);
+        offset = exit_qualification & 0x0fffUL;        
+        handle_mmio(APIC_DEFAULT_PHYS_BASE | offset);
+        break;
+    }
 
     default:
     exit_and_crash:
index 880e988b9611ed827c48a8368edf6bdeeecab7ee..191deac3814bd5b32543952337059201ca4b7fbf 100644 (file)
@@ -41,6 +41,11 @@ struct hvm_domain {
     s64                    tsc_frequency;
     struct pl_time         pl_time;
 
+    /* For memory-mapped vLAPIC/vTPR access optimization */
+    spinlock_t             vapic_access_lock;
+    int                    physmap_changed_for_vlapic_access : 1;
+    struct page_info       *apic_access_page;
+
     struct hvm_io_handler  io_handler;
 
     /* Lock protects access to irq, vpic and vioapic. */
index 6d174ebfd694f981df60dd52714ee323ce788c63..80cd32ec569b206969c0bf550f7b50e2cdb6dc5d 100644 (file)
 #define vlapic_disabled(vlapic)    ((vlapic)->hw.disabled)
 #define vlapic_enabled(vlapic)     (!vlapic_disabled(vlapic))
 
+#define vlapic_base_address(vlapic)                             \
+    (vlapic->hw.apic_base_msr & MSR_IA32_APICBASE_BASE)
+
 struct vlapic {
     struct hvm_hw_lapic      hw;
     struct hvm_hw_lapic_regs *regs;
     struct periodic_time     pt;
     s_time_t                 timer_last_update;
     struct page_info         *regs_page;
+
+    int                      mmap_vtpr_enabled : 1;
 };
 
 static inline uint32_t vlapic_get_reg(struct vlapic *vlapic, uint32_t reg)
index 8584a8d3dbdc10b38e8bcb1080efe44108bd711c..449afd11cf2bfda19f7a70f300872f9e856ac5eb 100644 (file)
@@ -106,6 +106,7 @@ void vmx_vmcs_exit(struct vcpu *v);
 #define CPU_BASED_ACTIVATE_MSR_BITMAP   0x10000000
 #define CPU_BASED_MONITOR_EXITING       0x20000000
 #define CPU_BASED_PAUSE_EXITING         0x40000000
+#define ACTIVATE_SECONDARY_CONTROLS     0x80000000
 extern u32 vmx_cpu_based_exec_control;
 
 #define PIN_BASED_EXT_INTR_MASK         0x00000001
@@ -121,8 +122,16 @@ extern u32 vmx_vmexit_control;
 #define VM_ENTRY_DEACT_DUAL_MONITOR     0x00000800
 extern u32 vmx_vmentry_control;
 
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+extern u32 vmx_secondary_exec_control;
+
+#define cpu_has_vmx_virtualize_apic_accesses \
+    (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
 #define cpu_has_vmx_tpr_shadow \
     (vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)
+#define cpu_has_vmx_mmap_vtpr_optimization \
+    (cpu_has_vmx_virtualize_apic_accesses && cpu_has_vmx_tpr_shadow)
+
 #define cpu_has_vmx_msr_bitmap \
     (vmx_cpu_based_exec_control & CPU_BASED_ACTIVATE_MSR_BITMAP)
 extern char *vmx_msr_bitmap;
@@ -160,6 +169,8 @@ enum vmcs_field {
     TSC_OFFSET_HIGH                 = 0x00002011,
     VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
     VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+    APIC_ACCESS_ADDR                = 0x00002014,
+    APIC_ACCESS_ADDR_HIGH           = 0x00002015, 
     VMCS_LINK_POINTER               = 0x00002800,
     VMCS_LINK_POINTER_HIGH          = 0x00002801,
     GUEST_IA32_DEBUGCTL             = 0x00002802,
index e2a5b59cea7a8cd9ec97e45f4e60adc190f460e9..63af4bfe8f97983f49e1f36fffb1d1592f167e1a 100644 (file)
@@ -33,6 +33,9 @@ void vmx_intr_assist(void);
 void vmx_do_resume(struct vcpu *);
 void set_guest_time(struct vcpu *v, u64 gtime);
 
+extern struct page_info *change_guest_physmap_for_vtpr(struct domain *d,
+                                                       int enable_vtpr);
+
 /*
  * Exit Reasons
  */
@@ -81,6 +84,7 @@ void set_guest_time(struct vcpu *v, u64 gtime);
 #define EXIT_REASON_MACHINE_CHECK       41
 
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
 
 /*
  * Interruption-information format
index 3ed5265e6ec2e88dfe09997867debb8c5f6c02b8..2cd8b27952cd206565262a7975cd1e3b58550991 100644 (file)
@@ -119,6 +119,7 @@ static inline void wrmsrl(unsigned int msr, __u64 val)
 #define MSR_IA32_VMX_CR0_FIXED1                 0x487
 #define MSR_IA32_VMX_CR4_FIXED0                 0x488
 #define MSR_IA32_VMX_CR4_FIXED1                 0x489
+#define MSR_IA32_VMX_PROCBASED_CTLS2            0x48b
 #define IA32_FEATURE_CONTROL_MSR                0x3a
 #define IA32_FEATURE_CONTROL_MSR_LOCK           0x1
 #define IA32_FEATURE_CONTROL_MSR_ENABLE_VMXON   0x4